import logging
from init import PATHS
LOGGER = logging.getLogger(__name__)
import pandas as pd


def main():
    LOGGER.info('BEGIN')
    # Import OEMs.csv
    OEM = pd.read_csv(PATHS.marklines / 'OEMs.csv', dtype={'Level2_bvdid':str,'year':int})
    # year list
    list_years = [2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,'current']
    #  OEM_bvdid list
    OEM_bvdid = list(set(list(OEM['Level2_bvdid'])))
    list_countries1 = list(set([bvdid[0:2] for bvdid in OEM_bvdid]))
    list_countries1.sort()
    LOGGER.info('---- GETTvING ROUND 1 ----')
    subsidiaries_round1 = get_subsidiaries_from_bvdidlist(list_countries1,OEM_bvdid, list_years)
    LOGGER.info('---- DONE GETTING ROUND 1 ----')
    subsidiaries_round1.to_csv(PATHS.dropbox / 'Data_outputted/A_AutoIndustry/subsidiaries.csv',index=False)



def get_subsidiaries_from_bvdidlist(list_countries,bvdid_list, list_years):
    subsidiaries = []
    for yr in list_years:
        LOGGER.info('year: {}'.format(yr))
        dfyear = []
        for i,country in enumerate(list_countries):
            LOGGER.info('Country: {}. {} / {}'.format(country, i, len(list_countries)))
            df = []
            for dfchunk in pd.read_csv(PATHS.orbis / 'Links/by_year_then_country/{}/csv/Links-shareholder-{}-{}.csv'.format(yr,country,yr), sep=',', dtype={'Direct % (only figures)':'float','Total % (only figures)':'float'}, chunksize=2000000):
                mask1 = dfchunk['Shareholder BvD ID'].isin(bvdid_list)
                mask2 = dfchunk['Subsidiary BvD ID']!= dfchunk['Shareholder BvD ID']
                dfchunk = dfchunk[mask1 & mask2]
                if dfchunk.shape[0]==0:
                    continue
                cond1 = dfchunk['Direct % (only figures)']>50
                cond2 = dfchunk['Total % (only figures)']>50
                cond3 = dfchunk['Type of relation'].isin(['GUO 50','GUO 50C','DUO 50','DUO 50C', 'HQ']) ## I should rerun including HQ (for branches)
                cond4 = dfchunk['GUO 25c JO'].isin(bvdid_list) ## to also find joint ventures
                subs = dfchunk.loc[cond1|cond2|cond3|cond4] ## note that this will not include an innovative startup in which an OEM has a substantive but minority share if not recorded explicitly as a joint venture.
                df.append(subs)
            if len(df)>0:
                df = pd.concat(df)
                df['year'] = yr
                dfyear.append(df) 
        dfyear = pd.concat(dfyear)
        subsidiaries.append(dfyear)
    subsidiaries = pd.concat(subsidiaries)
    return subsidiaries

